This is the code used to analyze and visualize the data used in the manuscript of Reuter et al. 2025

Set working directory

setwd("/Users/spagnuolo/Desktop/Reuter et el. 2025_final/Raw_data/Data_sets_Phage_titers")

Load required packages

## General packages required for analysis of all experiments
### Data organization and structuring
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.4     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ stringr 1.5.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ car::recode()   masks dplyr::recode()
## ✖ purrr::some()   masks car::some()
### Data visualization
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggthemes)
library(plotrix)
library(scales) # for trans_beaks when axis is log-transformed
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:plotrix':
## 
##     rescale
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(ggpubr)

# Experiment 3: Adsorption constant analysis
library(multcomp) # Pairwise comparison
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## Attaching package: 'TH.data'
## 
## The following object is masked from 'package:MASS':
## 
##     geyser
library(lme4) # Linear mixed model
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
# Experiment 4: Images analysis plaques
library (viridis)
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## 
## The following object is masked from 'package:scales':
## 
##     viridis_pal
# Experiment 6: Decay analysis
library(stringr)

Experiment 1: 3-hour transfer regime

Code was used to monitor MOI input for each transfer.

READ ME Specification of columns in data set

Visualize Bacteria and Phage concentrations for each 3-h transfer

Load data set

df<-read.csv("experiment_1_3_h_transfers.csv")

# df should have 28 observations of 6 variables

Checking data set

str(df) # structure of the columns
## 'data.frame':    28 obs. of  6 variables:
##  $ Transfer    : chr  "T1" "T1" "T1" "T1" ...
##  $ Line        : chr  "L1" "L2" "L3" "L4" ...
##  $ CFU         : int  76 76 76 76 76 98 98 98 98 98 ...
##  $ Dilution_CFU: num  1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
##  $ PFU         : int  120 21 238 88 44 21 26 89 19 132 ...
##  $ Dilution_PFU: num  1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 ...
summary(df) # summary of the columns
##    Transfer             Line                CFU         Dilution_CFU   
##  Length:28          Length:28          Min.   : 76.0   Min.   : 10000  
##  Class :character   Class :character   1st Qu.: 98.0   1st Qu.:100000  
##  Mode  :character   Mode  :character   Median :150.0   Median :100000  
##                                        Mean   :248.3   Mean   : 83929  
##                                        3rd Qu.:373.0   3rd Qu.:100000  
##                                        Max.   :644.0   Max.   :100000  
##       PFU          Dilution_PFU      
##  Min.   : 19.00   Min.   : 10000000  
##  1st Qu.: 30.50   1st Qu.: 10000000  
##  Median : 63.50   Median : 10000000  
##  Mean   : 66.82   Mean   : 19642857  
##  3rd Qu.: 87.25   3rd Qu.: 10000000  
##  Max.   :238.00   Max.   :100000000
# Turn Transfer and Line into factors
df$Transfer=as.factor(df$Transfer)
df$Line=as.factor(df$Line)

Calculate CFU per ml and PFU per ml

# Calculate CFU per ml by multiplying CFU counts and Dilution factor and multiply by an additional factor 10 to get CFU per ml because 100 µl of the plating dilution "Dilution_CFU" was used for plating
df$CFU_ml=df$CFU*df$Dilution_CFU*10

# Calculate PFU per ml by multiplying PFU counts and Dilution factor and multiply by an additional factor 10 to get PFU per ml because 100 µl of the plating dilution "Dilution_PFU" was used for the plaque assay
df$PFU_ml=df$PFU*df$Dilution_PFU*10

Visualize bacteria and phage concentration for each transfer using a dotplot

# Bacteria concentration
a=ggplot(df,aes(Transfer,CFU_ml))+
  geom_point()+ # dot plot
  xlab("Transfer")+ # label of x-axis
  ylab("E.coli C concentration (CFU/ml)")+ # label of y-axis
    ggtitle("Bacteria concentration")+ # plot title
theme_bw() #plot layout
a

# Virus concentration for each selection line separately until first small plaques were detected (comments on plot b also apply to plots c-f)
## Selection line 1
b=ggplot(df[df$Line=="L1",],aes(Transfer,PFU_ml))+
  geom_point(color="blue")+ # dot plot
  scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+ # log transformed y-axis
  xlab("Transfer")+ # label of x-axis
  ylab("PhiX174 concentration (PFU/ml)")+ #label of y-axis
  ggtitle("Selection line L1")+ # plot title
theme_bw() # plot layout

## Selection line 2
c=ggplot(df[df$Line=="L2",],aes(Transfer,PFU_ml))+
  geom_point(color="purple")+  
  scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
  xlab("Transfer")+
  ylab("PhiX174 concentration (PFU/ml)")+
  ggtitle("Selection line L2")+
  theme_bw()

## Selection line 3
d=ggplot(df[df$Line=="L3",],aes(Transfer,PFU_ml))+
  geom_point(color="green")+
  scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
  xlab("Transfer")+
  ylab("PhiX174 concentration (PFU/ml)")+
  ggtitle("Selection line L3")+
  theme_bw()

## Selection line 4
e=ggplot(df[df$Line=="L4",],aes(Transfer,PFU_ml))+
  geom_point(color="red")+
  scale_y_continuous(trans="log10", limits=c(1e+09,6e+10))+
  xlab("Transfer")+
  ylab("PhiX174 concentration (PFU/ml)")+
  ggtitle("Selection line L4")+
  theme_bw()

## Selection line 5
f=ggplot(df[df$Line=="L5",],aes(Transfer,PFU_ml))+
  geom_point(color="orange")+
  scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
  xlab("Transfer")+
  ylab("PhiX174 concentration (PFU/ml)")+
  ggtitle("Selection line L5")+
  theme_bw()

# Combine phage concentration plots for all selection lines in one plot
grid.arrange(b,c,d,e,f)

Experiment 1: Transfers required to observe first de-novo small plaque mutants and their frequency

Code used to generate Fig. S2A in the manuscript

READ ME Specification of columns in data set

Load data set

# Create data frame with number of transfers required to get first small plaques
df=read.csv("experiment_1_3_h_transfers_small_plaque_frequency.csv")

# df should have 5 observations of 5 variables

Checking data set

str(df) #structure of the dataset
## 'data.frame':    5 obs. of  5 variables:
##  $ Line           : chr  "L1" "L2" "L3" "L4" ...
##  $ Genotype       : chr  "F(G322D)" "F(G322D)" "F(M213I)" "F(S427*)" ...
##  $ Transfer_number: int  6 7 5 4 6
##  $ Total_PFU_ml   : num  1.9e+10 8.8e+09 7.2e+09 5.8e+09 8.9e+09
##  $ Small_PFU_ml   : num  1e+09 1e+08 1e+08 1e+08 1e+08
# Convert Line and Genotype into factors 
df$Genotype=as.factor(df$Genotype)
df$Line=as.factor(df$Line)

Calculate frequency in % of small plaques compared to the total phage concentration

# Create a new column for the frequency of small plaques in %
df$Frequency=NA

# Calculate the frequency in %
df$Frequency=(df$Small_PFU_ml)/(df$Total_PFU_ml*0.01)

# show frequencies
print(df$Frequency)
## [1] 5.263158 1.136364 1.388889 1.724138 1.123596

Visualize results using a barplot (Fig. S2A)

# Code to get the x-ticks in two lines including selection line AND genotype
addline_format <- function(x,...){
    gsub('\\s','\n',x)
}


# Visualize results
x=ggplot(df,aes(x=reorder(Line, -Transfer_number),y=Transfer_number, group=Genotype, fill=Genotype))+
  geom_col()+ # bar chart
  coord_flip()+ # flip x and y axis
  scale_fill_manual(values=c("#c6d325","#CC97A7", "#ef7c00"))+ # fill colors based on genotype
  scale_x_discrete(limits=c("L2","L1","L5","L3","L4"), 
                   labels=addline_format(c("F(G322D) L2","F(G322D) L1","F(G322D) L5","F(M213I) L3","F(S427*) L4")))+ # change y-axis labeling (x command because axes are flipped)
   annotate("text",x=5, y=1,
           label = "1.72% small plaques",  size = 5)+ # add frequency Line 4: 1.72%
   annotate("text",x=4, y=1,
           label = "1.39% small plaques",  size = 5)+ # add frequency Line 3: 1.39%
  annotate("text",x=3, y=1,
           label = "1.12% small plaques",  size = 5)+ # add frequency Line 5: 1.12%
    annotate("text",x=2, y=1,
           label = "5.26% small plaques",  size = 5)+ # add frequency Line 1: 5.26%
    annotate("text",x=1, y=1,
           label = "1.14% small plaques",  size = 5)+ # add frequency Line 2: 1.14%
  ylab("Number of 3-h transfer")+ # label x axis (after flipping)
  xlab("Genotype")+ # label y-axis (after flipping)
  theme_bw()+ # plot layout
  theme(legend.position="none")+ # not legend
    theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # font size
x

# Same image as high resolution jpeg file
# ggsave("mut_freq_3h.jpeg", plot = x, dpi = 600, width = 10, height = 6)

Experiment 2: 30-min transfer regime

Code was used to monitor MOI input for each transfer.

READ ME Specification of columns in data set

-Inoculation_volume_ml: Volume of phage lysate put into each transfer. For transfer 2 to 5 on each passage day 100 µl was used. For the first transfer of each passage day the phage input volume was calculated to get an MOI input of ~0.1

** Visualize Bacteria and Phage concentrations for each 30-min transfer**

Load data set

df=read.csv("experiment_2_30_min_transfer.csv")

# df contains 275 observations of 15 variables

Checking data set

str(df) # structure of columns
## 'data.frame':    275 obs. of  15 variables:
##  $ Passage                 : chr  "P1" "P1" "P1" "P1" ...
##  $ Transfer                : chr  "T1" "T1" "T1" "T1" ...
##  $ Passage_ID              : int  1 1 1 1 1 2 2 2 2 2 ...
##  $ Line                    : chr  "L1" "L2" "L3" "L4" ...
##  $ CFU                     : int  212 212 212 212 212 1260 1260 1260 1260 1260 ...
##  $ Dilution_CFU            : num  1e+05 1e+05 1e+05 1e+05 1e+05 1e+04 1e+04 1e+04 1e+04 1e+04 ...
##  $ CFU_ml                  : num  2.12e+08 2.12e+08 2.12e+08 2.12e+08 2.12e+08 1.26e+08 1.26e+08 1.26e+08 1.26e+08 1.26e+08 ...
##  $ PFU                     : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Dilution_PFU            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ PFU_ml                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Spot_assay_dilution     : num  1e+05 1e+05 1e+05 1e+04 1e+04 1e+05 1e+07 1e+05 1e+05 1e+07 ...
##  $ Output_phage_ml         : num  5e+07 5e+07 5e+07 5e+06 5e+06 5e+07 5e+09 5e+07 5e+07 5e+09 ...
##  $ Inoculation_volume_ml   : num  0.0135 0.0135 0.0135 0.0135 0.0135 0.1 0.1 0.1 0.1 0.1 ...
##  $ Real_virus_concentration: num  9990000 9990000 9990000 9990000 9990000 1000000 1000000 1000000 100000 100000 ...
##  $ Real_MOI                : num  0.0471 0.0471 0.0471 0.0471 0.0471 ...
summary(df) # summary of the columns
##    Passage            Transfer           Passage_ID        Line          
##  Length:275         Length:275         Min.   : 1.00   Length:275        
##  Class :character   Class :character   1st Qu.:10.00   Class :character  
##  Mode  :character   Mode  :character   Median :23.00   Mode  :character  
##                                        Mean   :23.91                     
##                                        3rd Qu.:37.00                     
##                                        Max.   :50.00                     
##                                                                          
##       CFU          Dilution_CFU        CFU_ml               PFU       
##  Min.   :  88.0   Min.   : 10000   Min.   : 68000000   Min.   : 16.0  
##  1st Qu.: 215.0   1st Qu.: 10000   1st Qu.:112000000   1st Qu.: 44.0  
##  Median :1116.0   Median : 10000   Median :140000000   Median : 98.5  
##  Mean   : 985.2   Mean   : 36182   Mean   :142541818   Mean   :103.1  
##  3rd Qu.:1464.0   3rd Qu.:100000   3rd Qu.:170000000   3rd Qu.:145.8  
##  Max.   :1944.0   Max.   :100000   Max.   :249000000   Max.   :255.0  
##                                                        NA's   :225    
##   Dilution_PFU         PFU_ml          Spot_assay_dilution Output_phage_ml    
##  Min.   :1.0e+07   Min.   :7.300e+09   Min.   :   10000    Min.   :5.000e+06  
##  1st Qu.:1.0e+07   1st Qu.:1.578e+10   1st Qu.:  100000    1st Qu.:5.000e+07  
##  Median :1.0e+08   Median :3.200e+10   Median :  100000    Median :5.000e+08  
##  Mean   :6.4e+07   Mean   :5.099e+10   Mean   :  717746    Mean   :9.984e+09  
##  3rd Qu.:1.0e+08   3rd Qu.:5.675e+10   3rd Qu.: 1000000    3rd Qu.:5.000e+08  
##  Max.   :1.0e+08   Max.   :2.550e+11   Max.   :10000000    Max.   :2.550e+11  
##  NA's   :225       NA's   :225         NA's   :62          NA's   :12         
##  Inoculation_volume_ml Real_virus_concentration    Real_MOI       
##  Min.   :0.0001955     Min.   :   100000        Min.   :0.000558  
##  1st Qu.:0.1000000     1st Qu.:  1000000        1st Qu.:0.007081  
##  Median :0.1000000     Median :  9990000        Median :0.044467  
##  Mean   :0.0810217     Mean   :  8947948        Mean   :0.064543  
##  3rd Qu.:0.1000000     3rd Qu.: 10000000        3rd Qu.:0.095495  
##  Max.   :0.1000000     Max.   :100000000        Max.   :0.896057  
##                        NA's   :7                NA's   :7
# Passage P2 failed to produce detectable infectious particles after Transfer 3. Therefore all 5 transfers of P2 were excluded from analysis. P2 was repeated as "P2b" for all five transfers
df<-df[df$Passage!="P2",]

# Set Block, Passage, Transfer, Line to factors
df$Passage<-as.factor(df$Passage)
df$Transfer<-as.factor(df$Transfer)
df$Line<-as.factor(df$Line)

Data visualization

Set general plotting settings

# Set color pallet
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#999999")

# Set level order of passages for plotting
df$Passage <- factor(df$Passage , levels=c("P1", "P2b", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "P10"))

Plot bacteria concentration

# Plot bacteria concentration for each transfer
a<-ggplot(df,aes(Passage,CFU_ml, color=Transfer))+
  geom_point()+ # dot plot
    scale_color_manual(values=cbPalette)+ # color settings
  xlab("Passage day")+ # label x-axis
  ylab("CFU/ml")+ # label y-axis
  theme_base()+ # plot layout
  theme(text = element_text(size = 9),
         axis.title = element_text(size =9 ),
         axis.text.x = element_text(angle=90,size = 9),
         legend.position = "bottom") # font size and legend position
a

Visualize real MOI input for each transfer individually for each selection line using dotplots

# Visualize all MOI's in one plot (e.g. violin plot with different lines)
ggplot(df,aes(Line, Real_MOI, fill=Line))+
  geom_violin()+ # violin plot
  scale_y_continuous(trans="log10")+ # log transformation of y-axis
  theme_bw() # plot layout

Visualize phage concentration

Note –> Phages after each 5th transfer T5 is accurate (after 3h incubation), was isolated using chloroform, the other 4x30 min titers were determined after filtration using a spot test, which was too inaccurate. To quantify correct free phage titer after 30-min transfers, see Experiment 6: Free phage titer quantification

Plot phage concentrations after each 5th transfer (3-h amplification), quantified by plaque assay after chloroform extraction

# Phage concentration for individual selection lines
## Subset dataframe to only transfer 5 for each passage
df1<-df[df$Transfer=="T5",]

# Plot phage titers for each selection line in an individual plot (comments on plot h apply also to plots i-l)

## Line 1
h<-ggplot(df1[df1$Line=="L1",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
  geom_point(position=position_jitterdodge())+ # dot plot with jitter to avoid point overlapping
   scale_color_manual(values = cbPalette)+ # color setting
      scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+ # order of passages
  xlab("Passage day")+ # label x-axis
  ylab("Virus concentration (viruses/ml)")+ # label y-axis
  ggtitle(("Selection line 1"))+ # plot title
  theme_base()+ # plot layout
  theme(text = element_text(size = 9),
         axis.title = element_text(size =9 ),
         axis.text.x = element_text(angle=90,size = 9),
         legend.position = "none") # font size and legend position
h

## Line 2
i<-ggplot(df1[df1$Line=="L2",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
  geom_point(position=position_jitterdodge())+
   scale_color_manual(values = cbPalette)+
      scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
  xlab("Passage day")+
  ylab("Virus concentration (viruses/ml)")+   
  ggtitle(("Selection line 2"))+
  theme_base()+
  theme(text = element_text(size = 9),
         axis.title = element_text(size =9 ),
         axis.text.x = element_text(angle=90,size = 9),
         legend.position = "none")
i

# Line 3
j<-ggplot(df1[df1$Line=="L3",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
  geom_point(position=position_jitterdodge())+
   scale_color_manual(values = cbPalette)+
      scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
  xlab("Passage day")+
  ylab("Virus concentration (viruses/ml)")+   
  ggtitle(("Selection line 3"))+
  theme_base()+
  theme(text = element_text(size = 9),
         axis.title = element_text(size =9 ),
         axis.text.x = element_text(angle=90,size = 9),
         legend.position = "none")
j

# Line 4
k<-ggplot(df1[df1$Line=="L4",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
  geom_point(position=position_jitterdodge())+
   scale_color_manual(values = cbPalette)+
      scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
  xlab("Passage day")+
  ylab("Virus concentration (viruses/ml)")+   
  ggtitle(("Selection line 4"))+
  theme_base()+
  theme(text = element_text(size = 9),
         axis.title = element_text(size =9 ),
         axis.text.x = element_text(angle=90,size = 9),
         legend.position = "none")
k

# Line 5
l<-ggplot(df1[df1$Line=="L5",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
  geom_point(position=position_jitterdodge())+
   scale_color_manual(values = cbPalette)+
      scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
  xlab("Passage day")+
  ylab("Virus concentration (viruses/ml)")+   
  ggtitle(("Selection line 5"))+
  theme_base()+
  theme(text = element_text(size = 9),
         axis.title = element_text(size =9 ),
         axis.text.x = element_text(angle=90,size = 9),
         legend.position = "none")
l

# Combined plots of all lines
grid.arrange(h,i,j,k,l)

Experiment 3: Adsorption constant analysis

Code used to generate Fig. S3A in the manuscript

READ ME Specification of columns in data set

Load data set

df<-read.csv("experiment_3_adsorption_assay.csv")

# -> 69 observations of 14 variables

Checking data set

str(df)# structure of columns
## 'data.frame':    69 obs. of  14 variables:
##  $ Genotype         : chr  "G322D" "G322D" "G322D" "G322D" ...
##  $ Run              : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ Sampling_time_min: int  2 4 6 2 4 6 2 4 6 2 ...
##  $ PFU_1            : int  51 18 22 68 152 36 36 33 42 191 ...
##  $ PFU_2            : int  38 18 20 81 146 29 32 68 52 151 ...
##  $ PFU_3            : int  49 24 18 72 96 37 15 40 71 129 ...
##  $ Median_PFU       : int  49 18 20 72 146 36 32 40 52 151 ...
##  $ Predilution      : int  10 10 10 10 10 10 10 10 10 10 ...
##  $ Dilution_plating : num  1000 100 10 1000 100 10 1000 100 10 1000 ...
##  $ Dilution_in_ml   : int  10 10 10 10 10 10 10 10 10 10 ...
##  $ PFU_ml           : num  4900000 180000 20000 7200000 1460000 36000 3200000 400000 52000 15100000 ...
##  $ CFU              : int  202 202 202 121 121 121 251 251 251 221 ...
##  $ Dilution_CFU     : num  1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
##  $ CFU_ml           : num  2.02e+08 2.02e+08 2.02e+08 1.21e+08 1.21e+08 1.21e+08 2.51e+08 2.51e+08 2.51e+08 2.21e+08 ...
summary(df) # summary of columns
##    Genotype              Run        Sampling_time_min     PFU_1       
##  Length:69          Min.   :1.000   Min.   :2         Min.   : 17.00  
##  Class :character   1st Qu.:2.000   1st Qu.:2         1st Qu.: 33.00  
##  Mode  :character   Median :4.000   Median :4         Median : 57.00  
##                     Mean   :4.174   Mean   :4         Mean   : 71.97  
##                     3rd Qu.:6.000   3rd Qu.:6         3rd Qu.: 94.00  
##                     Max.   :7.000   Max.   :6         Max.   :261.00  
##      PFU_2            PFU_3          Median_PFU      Predilution
##  Min.   : 13.00   Min.   : 14.00   Min.   : 14.00   Min.   :10  
##  1st Qu.: 37.00   1st Qu.: 40.00   1st Qu.: 39.00   1st Qu.:10  
##  Median : 61.00   Median : 61.00   Median : 58.00   Median :10  
##  Mean   : 72.94   Mean   : 74.13   Mean   : 72.75   Mean   :10  
##  3rd Qu.: 95.00   3rd Qu.: 96.00   3rd Qu.: 91.00   3rd Qu.:10  
##  Max.   :312.00   Max.   :367.00   Max.   :261.00   Max.   :10  
##  Dilution_plating Dilution_in_ml     PFU_ml              CFU       
##  Min.   :   1.0   Min.   :10     Min.   :    2800   Min.   :121.0  
##  1st Qu.:  10.0   1st Qu.:10     1st Qu.:   47000   1st Qu.:196.0  
##  Median : 100.0   Median :10     Median :  390000   Median :221.0  
##  Mean   : 190.4   Mean   :10     Mean   : 1229058   Mean   :235.2  
##  3rd Qu.: 100.0   3rd Qu.:10     3rd Qu.: 1130000   3rd Qu.:267.0  
##  Max.   :1000.0   Max.   :10     Max.   :15100000   Max.   :391.0  
##   Dilution_CFU       CFU_ml         
##  Min.   :1e+05   Min.   :121000000  
##  1st Qu.:1e+05   1st Qu.:196000000  
##  Median :1e+05   Median :221000000  
##  Mean   :1e+05   Mean   :235217391  
##  3rd Qu.:1e+05   3rd Qu.:267000000  
##  Max.   :1e+05   Max.   :391000000
# Convert Genotype and Run into factors
df$Genotype=as.factor(df$Genotype)
df$Run=as.factor(df$Run)

Fitting linear model to log-transformed free phage titers to determine the adsorption curve slopes

# Log transform free phage concentrations
df$log_PFU_ml=log(df$PFU_ml)

Plot adsorption curves within run with all genotypes

# Comments of plot a also apply to plots b-g
# Run 1
a=ggplot(df[df$Run=="1",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+ # dot plot
  scale_y_continuous(trans="log10")+ # log transformation of y-axis
  scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+ # set color palette
  theme_bw() # plot layout
a

# Run 2
b=ggplot(df[df$Run=="2",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+
  scale_y_continuous(trans="log10")+
  scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
  theme_bw()
b 

# Run 3
c=ggplot(df[df$Run=="3",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+
  scale_y_continuous(trans="log10")+
  scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
  theme_bw()
c

# Run 4
d=ggplot(df[df$Run=="4",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+
  scale_y_continuous(trans="log10")+
  scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
  theme_bw()
d

# Run 5
e=ggplot(df[df$Run=="5",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+
  scale_y_continuous(trans="log10")+
  scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+
  theme_bw()
e

# Run 6
f=ggplot(df[df$Run=="6",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+
  scale_y_continuous(trans="log10")+
  scale_color_manual(values=c("#c6d325","#ae017e","#29485d"))+
  theme_bw()
f

# Run 7
g=ggplot(df[df$Run=="7",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
  geom_point()+
 # geom_smooth(method="lm", linetype="dashed", se=F)+
  scale_y_continuous(trans="log10")+
  scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+
  theme_bw()
g

Creat a dataframe to record slopes and R^2 values received as output from the model fit

# Creat a data set to record slopes and R^2 values for each genotype and run, using subset df to time point 2 as a template
dfslope=df[df$Sampling_time_min==2,]

#-> dfslope should contain 23 observations of 6 variables

# Subset slope data set to only columns Run, Genotype and CFU_ml (Bacteria concentration needed for adsorption constant calculation)
dfslope=dfslope[,c(2,1,14)]

# Create unique identifier combining Genotype and Run
dfslope$ID=paste(dfslope$Run,dfslope$Genotype,sep="_")

# Create columns to record slope and R^2 values
dfslope$slope=NA 
dfslope$R2=NA # multiple R^2

Model fitting using a linear model for each genotype and run individually

Ancestor

# Each model represents an individual experimental run

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==1,])
summary(a) #slope=-0.99138, R^2= 0.9989
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 1, ])
## 
## Residuals:
##       49       50       51 
##  0.03783 -0.07566  0.03783 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       17.32905    0.14155  122.42   0.0052 **
## Sampling_time_min -0.99138    0.03276  -30.26   0.0210 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09267 on 1 degrees of freedom
## Multiple R-squared:  0.9989, Adjusted R-squared:  0.9978 
## F-statistic: 915.6 on 1 and 1 DF,  p-value: 0.02103
dfslope[dfslope$ID=="1_WT",]$slope=- 0.99138
dfslope[dfslope$ID=="1_WT",]$R2= 0.9989

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==2,])
summary(a) #slope=-0.83966 ,  R^2=0.9997
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 2, ])
## 
## Residuals:
##       52       53       54 
##  0.01778 -0.03557  0.01778 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       17.40857    0.06654  261.62  0.00243 **
## Sampling_time_min -0.83966    0.01540  -54.52  0.01168 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04356 on 1 degrees of freedom
## Multiple R-squared:  0.9997, Adjusted R-squared:  0.9993 
## F-statistic:  2972 on 1 and 1 DF,  p-value: 0.01168
dfslope[dfslope$ID=="2_WT",]$slope=- 0.83966
dfslope[dfslope$ID=="2_WT",]$R2= 0.9997

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==3,])
summary(a) #slope=-0.78892, R^2=0.9984
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 3, ])
## 
## Residuals:
##       55       56       57 
## -0.03669  0.07337 -0.03669 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       16.26296    0.13727  118.47  0.00537 **
## Sampling_time_min -0.78892    0.03177  -24.83  0.02562 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08986 on 1 degrees of freedom
## Multiple R-squared:  0.9984, Adjusted R-squared:  0.9968 
## F-statistic: 616.6 on 1 and 1 DF,  p-value: 0.02562
dfslope[dfslope$ID=="3_WT",]$slope=- 0.78892
dfslope[dfslope$ID=="3_WT",]$R2= 0.9984

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==4,])
summary(a) #slope=-0.9439,  R^2=0.944
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 4, ])
## 
## Residuals:
##      58      59      60 
##  0.2654 -0.5308  0.2654 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        16.1557     0.9931  16.268   0.0391 *
## Sampling_time_min  -0.9439     0.2299  -4.106   0.1521  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6501 on 1 degrees of freedom
## Multiple R-squared:  0.944,  Adjusted R-squared:  0.888 
## F-statistic: 16.86 on 1 and 1 DF,  p-value: 0.1521
dfslope[dfslope$ID=="4_WT",]$slope=- 0.9439
dfslope[dfslope$ID=="4_WT",]$R2= 0.944

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==5,])
summary(a) #slope=-0.7034, R^2 0.8892
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 5, ])
## 
## Residuals:
##      61      62      63 
##  0.2867 -0.5734  0.2867 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        14.3377     1.0728  13.365   0.0475 *
## Sampling_time_min  -0.7034     0.2483  -2.833   0.2161  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7023 on 1 degrees of freedom
## Multiple R-squared:  0.8892, Adjusted R-squared:  0.7784 
## F-statistic: 8.024 on 1 and 1 DF,  p-value: 0.2161
dfslope[dfslope$ID=="5_WT",]$slope=- 0.7034
dfslope[dfslope$ID=="5_WT",]$R2= 0.8892

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==6,])
summary(a) #slope=-0.9422,  R^2=0.9631
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 6, ])
## 
## Residuals:
##      64      65      66 
##  0.2128 -0.4257  0.2128 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        15.3928     0.7964  19.328   0.0329 *
## Sampling_time_min  -0.9422     0.1843  -5.112   0.1230  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5214 on 1 degrees of freedom
## Multiple R-squared:  0.9631, Adjusted R-squared:  0.9263 
## F-statistic: 26.13 on 1 and 1 DF,  p-value: 0.123
dfslope[dfslope$ID=="6_WT",]$slope=- 0.9422
dfslope[dfslope$ID=="6_WT",]$R2= 0.9631

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==7,])
summary(a) #slope=-0.9246,  slope=0.9389
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "WT" & df$Run == 7, ])
## 
## Residuals:
##      67      68      69 
##  0.2723 -0.5446  0.2723 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        15.4412     1.0188  15.156   0.0419 *
## Sampling_time_min  -0.9246     0.2358  -3.921   0.1590  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.667 on 1 degrees of freedom
## Multiple R-squared:  0.9389, Adjusted R-squared:  0.8779 
## F-statistic: 15.37 on 1 and 1 DF,  p-value: 0.159
dfslope[dfslope$ID=="7_WT",]$slope=- 0.9246
dfslope[dfslope$ID=="7_WT",]$R2= 0.9389

Small plaque mutant F(G322D)

# Each model represents an individual experimental run

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==1,])
summary(a) #slope=-1.3753, R^2=0.9867
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 1, ])
## 
## Residuals:
##       1       2       3 
##  0.1845 -0.3689  0.1845 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        17.9709     0.6902  26.037   0.0244 *
## Sampling_time_min  -1.3753     0.1598  -8.609   0.0736 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4519 on 1 degrees of freedom
## Multiple R-squared:  0.9867, Adjusted R-squared:  0.9734 
## F-statistic: 74.11 on 1 and 1 DF,  p-value: 0.07362
dfslope[dfslope$ID=="1_G322D",]$slope=- 1.3753
dfslope[dfslope$ID=="1_G322D",]$R2= 0.9867

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==2,])
summary(a) #slope=-1.3246 , R^2= 0.9499
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 2, ])
## 
## Residuals:
##       4       5       6 
## -0.3512  0.7023 -0.3512 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        18.7899     1.3140  14.300   0.0444 *
## Sampling_time_min  -1.3246     0.3041  -4.355   0.1437  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8602 on 1 degrees of freedom
## Multiple R-squared:  0.9499, Adjusted R-squared:  0.8998 
## F-statistic: 18.97 on 1 and 1 DF,  p-value: 0.1437
dfslope[dfslope$ID=="2_G322D",]$slope=- 1.3246
dfslope[dfslope$ID=="2_G322D",]$R2= 0.9499

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==3,])
summary(a) #slope-1.029916, R^21
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 3, ])
## 
## Residuals:
##         7         8         9 
##  0.006537 -0.013074  0.006537 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       17.031956   0.024458   696.4 0.000914 ***
## Sampling_time_min -1.029916   0.005661  -181.9 0.003499 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01601 on 1 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:  0.9999 
## F-statistic: 3.31e+04 on 1 and 1 DF,  p-value: 0.003499
dfslope[dfslope$ID=="3_G322D",]$slope=- 1.029916
dfslope[dfslope$ID=="3_G322D",]$R2= 1

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==4,])
summary(a) #slope=-1.3544,  R^2=0.888
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 4, ])
## 
## Residuals:
##      10      11      12 
##  0.5555 -1.1110  0.5555 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        18.6836     2.0785   8.989   0.0705 .
## Sampling_time_min  -1.3544     0.4811  -2.815   0.2173  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.361 on 1 degrees of freedom
## Multiple R-squared:  0.888,  Adjusted R-squared:  0.7759 
## F-statistic: 7.926 on 1 and 1 DF,  p-value: 0.2173
dfslope[dfslope$ID=="4_G322D",]$slope=- 1.3544
dfslope[dfslope$ID=="4_G322D",]$R2= 0.888

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==5,])
summary(a) #slope=-1.3334, R^2= 0.9876
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 5, ])
## 
## Residuals:
##      13      14      15 
##  0.1728 -0.3455  0.1728 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        15.7647     0.6464  24.389   0.0261 *
## Sampling_time_min  -1.3334     0.1496  -8.912   0.0711 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4232 on 1 degrees of freedom
## Multiple R-squared:  0.9876, Adjusted R-squared:  0.9751 
## F-statistic: 79.43 on 1 and 1 DF,  p-value: 0.07114
dfslope[dfslope$ID=="5_G322D",]$slope=- 1.3334
dfslope[dfslope$ID=="5_G322D",]$R2= 0.9876

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==6,])
summary(a) #slope=-1.3502, R^2=0.9594
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 6, ])
## 
## Residuals:
##      16      17      18 
##  0.3208 -0.6415  0.3208 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        16.3173     1.2002   13.60   0.0467 *
## Sampling_time_min  -1.3502     0.2778   -4.86   0.1292  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7857 on 1 degrees of freedom
## Multiple R-squared:  0.9594, Adjusted R-squared:  0.9188 
## F-statistic: 23.62 on 1 and 1 DF,  p-value: 0.1292
dfslope[dfslope$ID=="6_G322D",]$slope=- 1.3502
dfslope[dfslope$ID=="6_G322D",]$R2= 0.9594

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==7,])
summary(a) #slope=-1.2309,  R^2=0.9919
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "G322D" & df$Run == 7, ])
## 
## Residuals:
##      19      20      21 
##  0.1288 -0.2575  0.1288 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        16.0207     0.4818   33.25   0.0191 *
## Sampling_time_min  -1.2309     0.1115  -11.04   0.0575 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3154 on 1 degrees of freedom
## Multiple R-squared:  0.9919, Adjusted R-squared:  0.9837 
## F-statistic: 121.8 on 1 and 1 DF,  p-value: 0.05752
dfslope[dfslope$ID=="7_G322D",]$slope=- 1.2309
dfslope[dfslope$ID=="7_G322D",]$R2= 0.9919

Small plaque mutant F(S427*)

# Each model represents an individual experimental run

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==1,])
summary(a) #slope-1.2174, R^2= 0.9414
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "S427*" & df$Run == 1, ])
## 
## Residuals:
##      22      23      24 
##  0.3508 -0.7016  0.3508 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        17.1233     1.3125  13.046   0.0487 *
## Sampling_time_min  -1.2174     0.3038  -4.007   0.1557  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8592 on 1 degrees of freedom
## Multiple R-squared:  0.9414, Adjusted R-squared:  0.8828 
## F-statistic: 16.06 on 1 and 1 DF,  p-value: 0.1557
dfslope[dfslope$ID=="1_S427*",]$slope=- 1.2174
dfslope[dfslope$ID=="1_S427*",]$R2= 0.9414

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==2,])
summary(a) #slope=-1.03485 ,  R^2=0.9986
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "S427*" & df$Run == 2, ])
## 
## Residuals:
##       25       26       27 
##  0.04534 -0.09069  0.04534 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       17.61483    0.16966  103.83  0.00613 **
## Sampling_time_min -1.03485    0.03927  -26.35  0.02415 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1111 on 1 degrees of freedom
## Multiple R-squared:  0.9986, Adjusted R-squared:  0.9971 
## F-statistic: 694.5 on 1 and 1 DF,  p-value: 0.02415
dfslope[dfslope$ID=="2_S427*",]$slope=- 1.03485
dfslope[dfslope$ID=="2_S427*",]$R2= 0.9986

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==3,])
summary(a) #slope=-1.3484, R^2=0.9893
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "S427*" & df$Run == 3, ])
## 
## Residuals:
##      28      29      30 
##  0.1621 -0.3242  0.1621 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        18.0550     0.6066  29.765   0.0214 *
## Sampling_time_min  -1.3484     0.1404  -9.604   0.0660 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3971 on 1 degrees of freedom
## Multiple R-squared:  0.9893, Adjusted R-squared:  0.9786 
## F-statistic: 92.24 on 1 and 1 DF,  p-value: 0.06605
dfslope[dfslope$ID=="3_S427*",]$slope=- 1.3484
dfslope[dfslope$ID=="3_S427*",]$R2= 0.9893

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==4,])
summary(a) #slope=-0.9737, R^2= 0.9814
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "S427*" & df$Run == 4, ])
## 
## Residuals:
##      31      32      33 
##  0.1548 -0.3095  0.1548 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        15.0635     0.5791  26.011   0.0245 *
## Sampling_time_min  -0.9737     0.1340  -7.265   0.0871 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3791 on 1 degrees of freedom
## Multiple R-squared:  0.9814, Adjusted R-squared:  0.9628 
## F-statistic: 52.77 on 1 and 1 DF,  p-value: 0.08709
dfslope[dfslope$ID=="4_S427*",]$slope=- 0.9737
dfslope[dfslope$ID=="4_S427*",]$R2= 0.9814

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==5,])
summary(a) #slope=-1.3133, R^2= 0.9513
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "S427*" & df$Run == 5, ])
## 
## Residuals:
##      34      35      36 
##  0.3433 -0.6865  0.3433 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        16.5109     1.2844  12.855   0.0494 *
## Sampling_time_min  -1.3133     0.2973  -4.418   0.1417  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8408 on 1 degrees of freedom
## Multiple R-squared:  0.9513, Adjusted R-squared:  0.9025 
## F-statistic: 19.52 on 1 and 1 DF,  p-value: 0.1417
dfslope[dfslope$ID=="5_S427*",]$slope=- 1.3133
dfslope[dfslope$ID=="5_S427*",]$R2= 0.9513

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==7,])
summary(a) #slope=-1.2369,  R^2=0.9416
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "S427*" & df$Run == 7, ])
## 
## Residuals:
##      37      38      39 
##  0.3557 -0.7115  0.3557 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        15.5624     1.3311  11.692   0.0543 .
## Sampling_time_min  -1.2369     0.3081  -4.015   0.1554  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8714 on 1 degrees of freedom
## Multiple R-squared:  0.9416, Adjusted R-squared:  0.8832 
## F-statistic: 16.12 on 1 and 1 DF,  p-value: 0.1554
dfslope[dfslope$ID=="7_S427*",]$slope=- 1.2369
dfslope[dfslope$ID=="7_S427*",]$R2= 0.9416

Large plaque mutant F(T101A)

# Each model represents an individual experimental run

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==5,])
summary(a) #slope=-0.28577, R^2= 0.9814
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "T101A" & df$Run == 5, ])
## 
## Residuals:
##       40       41       42 
##  0.04540 -0.09081  0.04540 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       14.66372    0.16989  86.314  0.00738 **
## Sampling_time_min -0.28577    0.03932  -7.267  0.08705 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1112 on 1 degrees of freedom
## Multiple R-squared:  0.9814, Adjusted R-squared:  0.9628 
## F-statistic: 52.82 on 1 and 1 DF,  p-value: 0.08705
dfslope[dfslope$ID=="5_T101A",]$slope=- 0.28577
dfslope[dfslope$ID=="5_T101A",]$R2= 0.9814

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==6,])
summary(a) #slope=-0.34008, R^2= 0.993
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "T101A" & df$Run == 6, ])
## 
## Residuals:
##       43       44       45 
## -0.03298  0.06595 -0.03298 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       14.94736    0.12339  121.14  0.00526 **
## Sampling_time_min -0.34008    0.02856  -11.91  0.05334 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08078 on 1 degrees of freedom
## Multiple R-squared:  0.993,  Adjusted R-squared:  0.986 
## F-statistic: 141.8 on 1 and 1 DF,  p-value: 0.05334
dfslope[dfslope$ID=="6_T101A",]$slope=- 0.34008
dfslope[dfslope$ID=="6_T101A",]$R2= 0.993

a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==7,])
summary(a) #slope=-0.20034, R^2= 0.9875
## 
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype == 
##     "T101A" & df$Run == 7, ])
## 
## Residuals:
##       46       47       48 
## -0.02607  0.05214 -0.02607 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       14.68695    0.09755 150.562  0.00423 **
## Sampling_time_min -0.20034    0.02258  -8.873  0.07144 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06386 on 1 degrees of freedom
## Multiple R-squared:  0.9875, Adjusted R-squared:  0.9749 
## F-statistic: 78.74 on 1 and 1 DF,  p-value: 0.07144
dfslope[dfslope$ID=="7_T101A",]$slope=- 0.20034
dfslope[dfslope$ID=="7_T101A",]$R2= 0.9875 

Determine minimum and maximum R^2 value

summary(dfslope)
##  Run    Genotype     CFU_ml               ID                slope        
##  1:3   G322D:7   Min.   :121000000   Length:23          Min.   :-1.3753  
##  2:3   S427*:6   1st Qu.:196000000   Class :character   1st Qu.:-1.3190  
##  3:3   T101A:3   Median :221000000   Mode  :character   Median :-1.0299  
##  4:3   WT   :7   Mean   :235217391                      Mean   :-1.0036  
##  5:4             3rd Qu.:267000000                      3rd Qu.:-0.8821  
##  6:3             Max.   :391000000                      Max.   :-0.2003  
##  7:4                                                                     
##        R2        
##  Min.   :0.8880  
##  1st Qu.:0.9469  
##  Median :0.9814  
##  Mean   :0.9679  
##  3rd Qu.:0.9925  
##  Max.   :1.0000  
## 
# min: 0.89
# max: 1.00

Calculate adsorption constants for each genotype and experimental run

Formula for calculating k: k (ml^-1 min^-1)= -slope/Bacteria concentration (CFU_ml)

# Calculate adsorption rate constant k
dfslope$k=-(dfslope$slope)/(dfslope$CFU_ml)

Export dfslopes dataframe with the analysed slopes and R^2 values

#write.csv(dfslope,"experiment_3_slope_analysis.csv")

Calculate mean and se of adsorption rate constant

## WT
mean(dfslope[dfslope$Genotype=="WT",]$k) #4.146119e-09
## [1] 4.146119e-09
std.error(dfslope[dfslope$Genotype=="WT",]$k) #5.982199e-10
## [1] 5.982199e-10
## T101A
mean(dfslope[dfslope$Genotype=="T101A",]$k) # 9.874041e-10
## [1] 9.874041e-10
std.error(dfslope[dfslope$Genotype=="T101A",]$k) #6.043767e-11
## [1] 6.043767e-11
## G322D
mean(dfslope[dfslope$Genotype=="G322D",]$k) # 6.102084e-09
## [1] 6.102084e-09
std.error(dfslope[dfslope$Genotype=="G322D",]$k) #9.289692e-10
## [1] 9.289692e-10
## S427*
mean(dfslope[dfslope$Genotype=="S427*",]$k) #5.931108e-09
## [1] 5.931108e-09
std.error(dfslope[dfslope$Genotype=="S427*",]$k) #5.968958e-10
## [1] 5.968958e-10

Test for statistical significant differences in adsorption constants between genotypes

Test for normality and heterogeneity of variances

# Test for normal distribution
shapiro.test(dfslope$k) # passed: W = 0.9632, p-value = 0.5309
## 
##  Shapiro-Wilk normality test
## 
## data:  dfslope$k
## W = 0.9632, p-value = 0.5309
# Homogeneity of variances
leveneTest(k ~ Genotype, data = dfslope) # passed:  group DF= 3, group sample= 19, F-value= 1.2369  p=0.324
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  3  1.2369  0.324
##       19

Linear mixed model used to test for differences between genotypes

# Linear mixed effects model with Genotype as fixed effect and run as random effect

## Full model
m1<- lmer(k~Genotype +(1|Run) ,data= dfslope )
## Null model
m2<- lmer(k~1 +(1|Run),data=dfslope)
  
# Model comparison using anova to test if including genotype into the model significantly contributes to explaining adsorption constant variation
anova(m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dfslope
## Models:
## m2: k ~ 1 + (1 | Run)
## m1: k ~ Genotype + (1 | Run)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)    
## m2    3 -847.21 -843.80 426.60  -853.21                         
## m1    6 -870.97 -864.16 441.49  -882.97 29.764  3  1.548e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results of Anova Analysis of Variance Table Models: m2: k ~ 1 + (1 | Run) m1: k ~ Genotype + (1 | Run) npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
m2 3 -847.21 -843.80 426.60 -853.21
m1 6 -870.97 -864.16 441.49 -882.97 29.764 3 1.548e-06 ***

Summary of model

summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: k ~ Genotype + (1 | Run)
##    Data: dfslope
## 
## REML criterion at convergence: -718.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.23170 -0.50637  0.04028  0.48679  1.95181 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Run      (Intercept) 2.822e-18 1.680e-09
##  Residual             6.623e-19 8.138e-10
## Number of obs: 23, groups:  Run, 7
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)    6.102e-09  7.055e-10   8.649
## GenotypeS427* -4.454e-10  4.586e-10  -0.971
## GenotypeT101A -4.292e-09  5.983e-10  -7.173
## GenotypeWT    -1.956e-09  4.350e-10  -4.496
## 
## Correlation of Fixed Effects:
##             (Intr) GS427* GT101A
## GentypS427* -0.292              
## GentypT101A -0.224  0.313       
## GenotypeWT  -0.308  0.474  0.364

Results of Model m1 summary Linear mixed model fit by REML [‘lmerMod’] Formula: k ~ Genotype + (1 | Run) Data: dfslope

REML criterion at convergence: -718.6

Scaled residuals: Min 1Q Median 3Q Max -1.23170 -0.50637 0.04028 0.48679 1.95181

Random effects: Groups Name Variance Std.Dev. Run (Intercept) 2.822e-18 1.680e-09 Residual 6.623e-19 8.138e-10 Number of obs: 23, groups: Run, 7

Fixed effects: Estimate Std. Error t value (Intercept) 6.102e-09 7.055e-10 8.649 GenotypeS427* -4.454e-10 4.586e-10 -0.971 GenotypeT101A -4.292e-09 5.983e-10 -7.173 GenotypeWT -1.956e-09 4.350e-10 -4.496

Correlation of Fixed Effects: (Intr) GS427* GT101A GentypS427* -0.292
GentypT101A -0.224 0.313
GenotypeWT -0.308 0.474 0.364

Multiple comparison to compare between genotypes

# Using Tukey-HSD as Post-Hoc test for mutliple comparison
summary(glht(m1,linfct=mcp(Genotype="Tukey")),test=adjusted("holm"))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lmer(formula = k ~ Genotype + (1 | Run), data = dfslope)
## 
## Linear Hypotheses:
##                      Estimate Std. Error z value Pr(>|z|)    
## S427* - G322D == 0 -4.454e-10  4.586e-10  -0.971 0.331457    
## T101A - G322D == 0 -4.292e-09  5.983e-10  -7.173 4.40e-12 ***
## WT - G322D == 0    -1.956e-09  4.350e-10  -4.496 2.76e-05 ***
## T101A - S427* == 0 -3.846e-09  6.296e-10  -6.109 5.02e-09 ***
## WT - S427* == 0    -1.511e-09  4.586e-10  -3.294 0.001977 ** 
## WT - T101A == 0     2.336e-09  5.983e-10   3.904 0.000284 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- holm method)

Results of Post-Hoc Multiple comparison testing Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Tukey Contrasts

Fit: lmer(formula = k ~ Genotype + (1 | Run), data = dfslope)

Linear Hypotheses: Estimate Std. Error z value Pr(>|z|)
S427* - G322D == 0 -4.454e-10 4.586e-10 -0.971 0.331457
T101A - G322D == 0 -4.292e-09 5.983e-10 -7.173 4.40e-12 WT - G322D == 0 -1.956e-09 4.350e-10 -4.496 2.76e-05 T101A - S427* == 0 -3.846e-09 6.296e-10 -6.109 5.02e-09 WT - S427 == 0 -1.511e-09 4.586e-10 -3.294 0.001977 WT - T101A == 0 2.336e-09 5.983e-10 3.904 0.000284 ***

Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Adjusted p values reported – holm method)

Model validation (Fig. S7)

#output_file <- "adsorption_statistics_model_checking.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)

## Plot layout
par(mfrow=c(1,2))

## qqplot
qqPlot(residuals(m1))
##  4 16 
##  2  6
## residuals vs fitted values plot
plot(residuals(m1)~fitted(m1))
abline(a=0, b=0, col="red")

#dev.off()

Visualize adsorption constant k using a boxplot

(Figure Fig. 3A)

# Rearrange order of genotypes in ascending order
dfslope$Genotype <- factor(dfslope$Genotype , levels=c("T101A", "WT","S427*", "G322D"))

# visualize results of adsorption rate constant
a=ggplot(dfslope,aes(Genotype,k,fill=as.factor(Genotype)))+
  geom_boxplot(alpha=0.8)+ #boxplot
  geom_point()+ # individual adsorption constants as dots
  annotate(geom="text", x=1.2, y=1.15e-09, label="a",
              color="black", size=5)+ # significance letter F(T101A)
    annotate(geom="text", x=2.2, y=5.25e-09, label="b",
              color="black", size=5)+ # significance letter Ancestor
    annotate(geom="text", x=3.2, y=6.8e-09, label="c",
              color="black", size=5)+ # significance letter F(S427*)
    annotate(geom="text", x=4.2, y=7.1e-09, label="c",
              color="black", size=5)+ # significance letter F(G322D)
  scale_y_continuous(trans="log10", 
                     labels=trans_format("log10", math_format(10^.x)),
                     breaks=c(1e-09,1e-08))+ # log transformation of y-axis
  scale_x_discrete(breaks=c("T101A","WT","S427*","G322D"),
                   labels=c("F(T101A)","Ancestor","F(S427*)","F(G322D)"))+ # change label of mutants on the x-axis
    scale_fill_manual(values=c("#ae017e","#29485d","#ef7c00","#c6d325"))+ # color setting
  labs(x = "Genotype", 
       y = expression(paste("Adsorption constant (ml"^"-1","min"^"-1 ",")")))+ #labels of x and y axis 
  theme_bw()+ # plot layout
  theme(legend.position = "none")+ # no legend
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # font size
a

# Save plot at 600 dpi resolution
#ggsave("adsorption.jpeg", plot = a, dpi = 600, width = 8, height = 6)

Experiment 4: Image analysis of plaque assays

Code used to generate Fig. 3B and Fig. S4 in the manuscript

READ ME Specification of columns in data set

Load data set

df=read.csv("experiment_4_plaque_image_analysis.csv")

# df consists of 52687 observations and 10 variables

Checking data set

str(df) # structure of columns
## 'data.frame':    52687 obs. of  10 variables:
##  $ Block       : chr  "0" "0" "0" "0" ...
##  $ Phenotype   : chr  "LP" "LP" "LP" "LP" ...
##  $ Line        : chr  "WT" "WT" "WT" "WT" ...
##  $ Image       : chr  "A" "A" "A" "A" ...
##  $ Colony      : int  8 8 8 8 8 8 8 8 8 8 ...
##  $ Area        : num  0.555 0.555 0.555 0.555 0.555 ...
##  $ Distance    : num  1 1.41 2 2.24 2.83 ...
##  $ Intensity   : num  100 97.2 95.5 94.6 94 ...
##  $ Intensity_SD: num  3.1 1.99 1.81 1.65 1.4 ...
##  $ ID          : chr  "0_LP_WT" "0_LP_WT" "0_LP_WT" "0_LP_WT" ...
summary(df) #summary of columns
##     Block            Phenotype             Line              Image          
##  Length:52687       Length:52687       Length:52687       Length:52687      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Colony           Area             Distance        Intensity     
##  Min.   :  3.0   Min.   :0.001384   Min.   : 1.000   Min.   : 60.00  
##  1st Qu.: 19.0   1st Qu.:0.071833   1st Qu.: 5.831   1st Qu.: 76.75  
##  Median : 43.0   Median :0.294716   Median :10.296   Median : 83.00  
##  Mean   :102.5   Mean   :0.334197   Mean   :12.869   Mean   : 85.62  
##  3rd Qu.:117.0   3rd Qu.:0.534672   3rd Qu.:19.105   3rd Qu.: 92.50  
##  Max.   :621.0   Max.   :1.524953   Max.   :45.880   Max.   :154.25  
##   Intensity_SD          ID           
##  Min.   : 0.0000   Length:52687      
##  1st Qu.: 0.9428   Class :character  
##  Median : 1.7916   Mode  :character  
##  Mean   : 2.1207                     
##  3rd Qu.: 2.8986                     
##  Max.   :46.1976

Visualize plaque area

Code used to generate Fig. 3B

Each colony id corresponds to one measured plaques. Multiple intensity and distance values exist for each colony id but only one area value. To visualize plaque areas, df has to be summarized according to area.

Summarize data to have only one area value per plaque id

# Make a copy of the df to work with for the area analysis
df1=df 

# Subset df1 used for area analysis by removing columns "Distance", "Intensity", and "Intensity_SD"
df1=df1[,-c(7:9)]

# Create unique identifier per colony & image
df1$ColID=paste(df1$ID,df1$Colony,df1$Image,sep="_") 

# Summarize data to that only one area per colony id exists
df1=df1 %>%
  group_by(ColID) %>%
  summarise(across(c(Area),mean))

# Seperate unique identifier column "ColID" into individual columnse
df1=separate(df1,col=ColID,into=c("Block","Phenotype","Line","Colony", "Image"),sep="_")

# Convert Phenotype and Line into factors
df1$Phenotype=as.factor(df1$Phenotype)
df1$Line=as.factor(df1$Line)

# Create unique identifier "ID" combining Block, Phenotype, and Line
df1$ID=paste(df1$Block,df1$Phenotype,df1$Line,sep="_")
df1$ID=as.factor(df1$ID)

# Add a column with genotype information
df1$Genotype=NA
# Add genotypes
df1[df1$ID=="0_LP_WT",]$Genotype="WT"
df1[df1$ID=="1_SP_L1",]$Genotype="G322D"
df1[df1$ID=="1_SP_L4",]$Genotype="S427*"
df1[df1$ID=="A_LP_L3",]$Genotype="T101A"

# Convert Genotype into factor
df1$Genotype=as.factor(df1$Genotype)

Visualize plaque area results using a combined violin plot and boxplot

# Rearrange Genotypes based on plaque size in decenting order
df1$Genotype <- factor(df1$Genotype , levels=c("T101A", "WT","S427*", "G322D"))

# Visualize plaque area using a combined box and violin plot (Fig. 3B)
a=ggplot(df1,aes(Genotype,Area, fill=Genotype))+
  geom_violin(alpha=0.8)+ # violin plot
  geom_boxplot(width=0.1, fill="white", outlier.shape=NA)+ # boxplot
scale_y_continuous(trans="log10", 
                     labels=trans_format("log10", math_format(10^.x)),
                     breaks=c(1e-03,1e-02,1e-01,1e+00))+ # log scale with exponential y-ticks
  scale_x_discrete(breaks=c("T101A","WT","S427*","G322D"),
                   labels=c("F(T101A)","Ancestor","F(S427*)","F(G322D)"))+ # Change x labels 
    scale_fill_manual(values=c("#ae017e","#29485d","#ef7c00","#c6d325"))+ #setting colors of violin plot
  labs(x = "Genotype", 
       y = expression(paste("Plaque area (mm"^"2",")")))+ # labels of x and y axis
  theme_bw()+ # plot layout
  theme(legend.position = "none")+ # remove figure legend
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # set font size
a

# Save plot at high resolution resolution
# ggsave("plaque_area.jpeg", plot = a, dpi = 600, width = 8, height = 6)

Summary data for plaque area

# Summary of total number of plaques analyzed for each genotype
summary(df1)
##     Block           Phenotype Line        Colony             Image          
##  Length:870         LP:136    L1:303   Length:870         Length:870        
##  Class :character   SP:734    L3: 60   Class :character   Class :character  
##  Mode  :character             L4:431   Mode  :character   Mode  :character  
##                               WT: 76                                        
##                                                                             
##                                                                             
##       Area                ID       Genotype  
##  Min.   :0.001384   0_LP_WT: 76   T101A: 60  
##  1st Qu.:0.028572   1_SP_L1:303   WT   : 76  
##  Median :0.051452   1_SP_L4:431   S427*:431  
##  Mean   :0.112703   A_LP_L3: 60   G322D:303  
##  3rd Qu.:0.095175                            
##  Max.   :1.524953
# F(G322D): 303
# F(S427*): 431
# F(T101A): 60
# ancestor: 76

# Calculate mean and standard error for plaque area of each genotype
## Ancestor
mean(df1[df1$Genotype=="WT",]$Area) # 0.375
## [1] 0.3749175
std.error(df1[df1$Genotype=="WT",]$Area) # 0.021
## [1] 0.02140839
## F(T101A)
mean(df1[df1$Genotype=="T101A",]$Area) #0.518
## [1] 0.5184235
std.error(df1[df1$Genotype=="T101A",]$Area) # 0.039
## [1] 0.0391119
## F(G322D)
mean(df1[df1$Genotype=="G322D",]$Area) # 0.033
## [1] 0.03278611
std.error(df1[df1$Genotype=="G322D",]$Area) # 0.001
## [1] 0.001302967
## F(S427*)
mean(df1[df1$Genotype=="S427*",]$Area) #0.066
## [1] 0.06616785
std.error(df1[df1$Genotype=="S427*",]$Area) #0.002
## [1] 0.00189727

** Visualize plaque turbidity**

Code for supplementary figure Fig. S4

Calculate mean intensity and area

# Make a copy of df
df2=df

# Create unique identifier per colony & image
df2$ColID=paste(df2$ID,df2$Colony,df2$Image,sep="_") 

# Summarize data to that only one area per colony id exists
df2=df2 %>%
  group_by(ColID) %>%
  summarise(across(c(Area, Intensity),mean))

# Seperate unique identifier column "ColID" into individual columnse
df2=separate(df2,col=ColID,into=c("Block","Phenotype","Line","Colony", "Image"),sep="_")

# Convert Phenotype and Line into factors
df2$Phenotype=as.factor(df2$Phenotype)
df2$Line=as.factor(df2$Line)

# Create unique identifier "ID" combining Block, Phenotype, and Line
df2$ID=paste(df2$Block,df2$Phenotype,df2$Line,sep="_")
df2$ID=as.factor(df2$ID)

# Add a column with genotype information
df2$Genotype=NA
# Add genotypes
df2[df2$ID=="0_LP_WT",]$Genotype="Ancestor"
df2[df2$ID=="1_SP_L1",]$Genotype="F(G322D)"
df2[df2$ID=="1_SP_L4",]$Genotype="F(S427*)"
df2[df2$ID=="A_LP_L3",]$Genotype="F(T101A)"

# Convert Genotype into factor
df2$Genotype=as.factor(df2$Genotype)

Visualize plaque turbidity using a density plot (Fig.S4)

# Set levels of genotypes form highest to lowest turbidty
df2$Genotype <- factor(df2$Genotype , levels=c("F(G322D)", "F(S427*)","F(T101A)", "Ancestor"))

# Plot results using a density plot
x=ggplot(df2,aes(x=Area,y=Intensity,color=Genotype)) +
  geom_density_2d()+ # density plot with concentric areas
  scale_x_continuous(trans="log10")+ # log transform area axis
        scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+ # set color palette
  labs(x = expression(paste(" Plaque area (mm"^"2 ",")")),
       y="Turbidity")+ # labels of x- and y-axis
  theme_bw()+ # plot layout
  theme(legend.position = "bottom")+ # legend position
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # font size
x

# Save image at high resolution as jpeg file
# ggsave("turbidity.jpeg", plot = x, dpi = 600, width = 8, height = 6)

Experiment 5: Free phages transferred during 30-min transfer experiment

Code used to generate Fig. 4D in the manuscript

READ ME Specification of columns in data set

-Treatment: Treatment Filtration: Samples were sterile filtered using a 0.2 µm filter to remove bateria and phages still inside bacterial hosts

-Transfer: Transfer identity, Transfers T1, T2, T3, and T4 were 30-minute transfers after which 100 µl of phage-bacteria mix was passaged onto fresh, susceptible ancestral bacteria cells

-Line: Selection line. Genotype used to start the transfer experiment with (4 in total) - WT: ancestral wild type phage genotype, large plaque former - T101A: mutant T101A forming large plaques, evolved during 30-minute transfers in MS_002-exp_4

-CFU: colony-forming units of the ancestral E. coli C culture in mid-log phase at the moment of infection initiation

-CFU_dilution: plating dilution used to plate 100 µl of bacterial culture for CFU counting

-CFU_ml: colony-forming units in ml, calculated by CFUCFU_dilution10 (because 100 µl were plated)

-PFU_LP_1: plaque-forming units of the large-plaque phenotype for each sample of replicate plating 1

-dilution_1: plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 1

-PFU_LP_2: plaque-forming units of the large-plaque phenotype for each sample of replicate plating 2

-dilution_2: plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 2

-PFU_LP_3: plaque-forming units of the large-plaque phenotype for each sample of replicate plating 3

-dilution_3: plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 3

-PFU_ml_LP1: plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 1

-PFU_ml_LP2: plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 2

-PFU_ml_LP3: plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 3

-Median_PFU_ml: Median large plaque PFU per ml calculated based on phage concentrations of the three replicate platings

-Median_WT: Median of titering for the wild type taken from column “Median_PFU_ml” used to calculate the ration between free phages of mutant F(T101A) and the ancestor

Load data set

df=read.csv("experiment_5_30min_free_phages.csv")

# df has 24 observations and 18 variables

Checking data set

str(df) # structure of data columns
## 'data.frame':    24 obs. of  18 variables:
##  $ Replicate    : int  1 1 1 1 1 1 1 1 2 2 ...
##  $ Treatment    : chr  "Filtration" "Filtration" "Filtration" "Filtration" ...
##  $ Transfer     : chr  "T1" "T2" "T3" "T4" ...
##  $ Line         : chr  "T101A" "T101A" "T101A" "T101A" ...
##  $ CFU          : int  148 136 78 135 148 136 78 135 75 73 ...
##  $ CFU_dilution : num  1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
##  $ CFU_ml       : num  1.48e+08 1.36e+08 7.80e+07 1.35e+08 1.48e+08 1.36e+08 7.80e+07 1.35e+08 7.50e+07 7.30e+07 ...
##  $ PFU_LP_1     : int  14 26 194 139 26 97 168 145 53 170 ...
##  $ dilution_1   : num  1e+06 1e+07 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+07 ...
##  $ PFU_LP_2     : int  70 28 23 95 60 96 15 78 49 113 ...
##  $ dilution_2   : num  1e+05 1e+07 1e+07 1e+06 1e+05 1e+06 1e+07 1e+06 1e+06 1e+07 ...
##  $ PFU_LP_3     : int  80 26 21 95 20 80 60 47 52 96 ...
##  $ dilution_3   : num  1e+05 1e+07 1e+07 1e+06 1e+05 1e+06 1e+06 1e+06 1e+06 1e+07 ...
##  $ PFU_ml1      : num  1.40e+08 2.60e+09 1.94e+09 1.39e+09 2.60e+08 ...
##  $ PFU_ml2      : num  7.0e+07 2.8e+09 2.3e+09 9.5e+08 6.0e+07 ...
##  $ PFU_ml3      : num  8.0e+07 2.6e+09 2.1e+09 9.5e+08 2.0e+07 8.0e+08 6.0e+08 4.7e+08 5.2e+08 9.6e+09 ...
##  $ Median_PFU_ml: num  8.0e+07 2.6e+09 2.1e+09 9.5e+08 6.0e+07 ...
##  $ Median_WT    : num  6.0e+07 9.6e+08 1.5e+09 7.8e+08 6.0e+07 9.6e+08 1.5e+09 7.8e+08 2.9e+08 7.3e+09 ...
summary(df) # summary of data columns
##    Replicate  Treatment           Transfer             Line          
##  Min.   :1   Length:24          Length:24          Length:24         
##  1st Qu.:1   Class :character   Class :character   Class :character  
##  Median :2   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2                                                           
##  3rd Qu.:3                                                           
##  Max.   :3                                                           
##       CFU         CFU_dilution       CFU_ml             PFU_LP_1     
##  Min.   : 73.0   Min.   :1e+05   Min.   : 73000000   Min.   : 14.00  
##  1st Qu.: 96.0   1st Qu.:1e+05   1st Qu.: 96000000   1st Qu.: 25.25  
##  Median :135.5   Median :1e+05   Median :135500000   Median : 43.50  
##  Mean   :150.6   Mean   :1e+05   Mean   :150583333   Mean   : 72.12  
##  3rd Qu.:150.8   3rd Qu.:1e+05   3rd Qu.:150750000   3rd Qu.:109.00  
##  Max.   :354.0   Max.   :1e+05   Max.   :354000000   Max.   :194.00  
##    dilution_1           PFU_LP_2        dilution_2          PFU_LP_3     
##  Min.   :  1000000   Min.   : 15.00   Min.   :1.00e+05   Min.   : 15.00  
##  1st Qu.:  1000000   1st Qu.: 22.75   1st Qu.:1.00e+06   1st Qu.: 25.25  
##  Median : 10000000   Median : 45.50   Median :1.00e+07   Median : 45.00  
##  Mean   : 17125000   Mean   : 50.96   Mean   :1.78e+07   Mean   : 52.54  
##  3rd Qu.: 10000000   3rd Qu.: 75.00   3rd Qu.:1.00e+07   3rd Qu.: 80.00  
##  Max.   :100000000   Max.   :113.00   Max.   :1.00e+08   Max.   :128.00  
##    dilution_3           PFU_ml1             PFU_ml2         
##  Min.   :   100000   Min.   :1.400e+08   Min.   :6.000e+07  
##  1st Qu.:  1000000   1st Qu.:8.600e+08   1st Qu.:7.075e+08  
##  Median : 10000000   Median :2.270e+09   Median :2.550e+09  
##  Mean   : 17425000   Mean   :5.562e+09   Mean   :5.272e+09  
##  3rd Qu.: 10000000   3rd Qu.:8.100e+09   3rd Qu.:7.625e+09  
##  Max.   :100000000   Max.   :1.900e+10   Max.   :2.200e+10  
##     PFU_ml3          Median_PFU_ml         Median_WT        
##  Min.   :2.000e+07   Min.   :6.000e+07   Min.   :6.000e+07  
##  1st Qu.:5.075e+08   1st Qu.:7.150e+08   1st Qu.:6.575e+08  
##  Median :2.650e+09   Median :2.350e+09   Median :1.800e+09  
##  Mean   :5.344e+09   Mean   :5.366e+09   Mean   :3.560e+09  
##  3rd Qu.:9.375e+09   3rd Qu.:8.700e+09   3rd Qu.:5.425e+09  
##  Max.   :2.300e+10   Max.   :2.200e+10   Max.   :1.280e+10
# Turn line into a factor
df$Line=as.factor(df$Line)

Calculate ratio between WT and T101

df$ratio=df$Median_PFU_ml/df$Median_WT
# for WT, ratio should be 1, check if this is true
summary(df[df$Line=="WT",]$ratio)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1

Calculate mean and st between ratios of T101A/WT for each transfer

# Remove all WT (ratio value=1)
df1=df[df$Line!="WT",]

# Create a column for ratio mean and se
df1$mean=NA
df1$st.error=NA

# Mean and standard error of all transfer T1 for three replicates
mean(df1[df1$Transfer=="T1",]$ratio) #1.51
## [1] 1.505914
std.error(df1[df1$Transfer=="T1",]$ratio) #0.14
## [1] 0.1445667
## add values to table
df1[df1$Transfer=="T1",]$mean=1.51
df1[df1$Transfer=="T1",]$st.error=0.14

# Mean and standard error of all transfer T2 for three replicates
mean(df1[df1$Transfer=="T2",]$ratio) #1.99
## [1] 1.991676
std.error(df1[df1$Transfer=="T2",]$ratio) #0.36
## [1] 0.3617051
## add values to table
df1[df1$Transfer=="T2",]$mean=1.99
df1[df1$Transfer=="T2",]$st.error=0.36

# Mean and standard error of all transfer T3 for three replicates
mean(df1[df1$Transfer=="T3",]$ratio) #1.84
## [1] 1.836898
std.error(df1[df1$Transfer=="T3",]$ratio) #0.22
## [1] 0.2184878
## add values to table
df1[df1$Transfer=="T3",]$mean=1.84
df1[df1$Transfer=="T3",]$st.error=0.22

# Mean and standard error of all transfer T4 for three replicates
mean(df1[df1$Transfer=="T4",]$ratio) #2.47
## [1] 2.477411
std.error(df1[df1$Transfer=="T4",]$ratio) #0.88
## [1] 0.8779285
## add values to table
df1[df1$Transfer=="T4",]$mean=2.47
df1[df1$Transfer=="T4",]$st.error=0.88

# Add transfer as individual number
df1$no_transfer=NA
df1[df1$Transfer=='T1',]$no_transfer=1
df1[df1$Transfer=='T2',]$no_transfer=2
df1[df1$Transfer=='T3',]$no_transfer=3
df1[df1$Transfer=='T4',]$no_transfer=4

# Visualize phage ratios compared to wild type in a bar plot (Fig. 4D)
a=ggplot(df1,aes(no_transfer, mean,fill=Line))+
  geom_bar(stat="identity", position=position_dodge())+ # bar chart vertical
  geom_errorbar(aes(ymin=mean-st.error, ymax=mean+st.error), width=.2,
                 position=position_dodge(.9))+ # add error bars
    scale_fill_manual(values=c("#ae017e"))+ # set colors of bars
  geom_hline(yintercept=1,linetype="dashed", color="black")+ # horizontal line at 1 (indicating ratio frequency of wild type )
  xlab("Transfer")+ # label x-axis
    ylab("Relative free phage particles after 30 mins")+ # label y-axis
  theme_bw()+ # plot layout
  theme(legend.position = "none")+
    theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # remove legend and set font size
a

# Save plot at high resolution (Fig 4 D)
#ggsave("free_phages.jpeg", plot = a, dpi = 600, width = 8, height = 6)

Experiment 6: Decay analysis

Code used to generate Fig. 5B and Fig. S6 in the manuscript

READ ME Specification of columns in data set

Load data set

df=read.csv("experiment_6_decay_analysis.csv")

# df contains 40 observations of 8 variables

Checking data set

str(df) # structure of columns
## 'data.frame':    40 obs. of  8 variables:
##  $ Sampling_time_min: int  30 30 30 30 60 60 60 60 90 90 ...
##  $ Genotype         : chr  "WT" "G322D" "S427*" "T101A" ...
##  $ CFU              : int  108 108 108 108 108 108 108 108 108 108 ...
##  $ Dilution_CFU     : num  1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
##  $ CFU_ml           : num  1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 ...
##  $ PFU              : int  62 72 245 101 81 128 138 48 40 147 ...
##  $ Dilution_PFU     : num  1e+07 1e+07 1e+07 1e+06 1e+08 1e+08 1e+08 1e+08 1e+08 1e+08 ...
##  $ PFU_ml           : num  6.20e+09 7.20e+09 2.45e+10 1.01e+09 8.10e+10 ...
summary(df) #summary of columns
##  Sampling_time_min   Genotype              CFU       Dilution_CFU  
##  Min.   : 30       Length:40          Min.   :108   Min.   :1e+05  
##  1st Qu.: 90       Class :character   1st Qu.:108   1st Qu.:1e+05  
##  Median :165       Mode  :character   Median :108   Median :1e+05  
##  Mean   :165                          Mean   :108   Mean   :1e+05  
##  3rd Qu.:240                          3rd Qu.:108   3rd Qu.:1e+05  
##  Max.   :300                          Max.   :108   Max.   :1e+05  
##      CFU_ml              PFU          Dilution_PFU           PFU_ml         
##  Min.   :1.08e+08   Min.   : 15.00   Min.   :  1000000   Min.   :1.010e+09  
##  1st Qu.:1.08e+08   1st Qu.: 48.00   1st Qu.: 10000000   1st Qu.:5.975e+09  
##  Median :1.08e+08   Median : 83.50   Median :100000000   Median :4.400e+10  
##  Mean   :1.08e+08   Mean   : 97.25   Mean   : 65350000   Mean   :6.710e+10  
##  3rd Qu.:1.08e+08   3rd Qu.:139.25   3rd Qu.:100000000   3rd Qu.:1.190e+11  
##  Max.   :1.08e+08   Max.   :245.00   Max.   :100000000   Max.   :2.230e+11
# Convert genotype into factor
df$Genotype=as.factor(df$Genotype)

Add plaque phenotype column to the df

# Add plaque phenotype column to the df frame
df$Phenotype=NA
df[df$Genotype=="WT",]$Phenotype="LP"
df[df$Genotype=="T101A",]$Phenotype="LP"
df[df$Genotype=="G322D",]$Phenotype="SP"
df[df$Genotype=="S427*",]$Phenotype="SP"

# Convert phenotype into a factor
df$Phenotype=as.factor(df$Phenotype)

Relable Genotypes to clarify that all mutations are found in protein F in a new column called “Label” for plotting

# Add Genotypes as Labels including the protein in which mutation occurs
df$Label=NA
df[df$Genotype=="G322D",]$Label="F(G322D)"
df[df$Genotype=="S427*",]$Label="F(S427*)"
df[df$Genotype=="T101A",]$Label="F(T101A)"
df[df$Genotype=="WT",]$Label="Ancestor"

# Convert Label into factor
df$Label=as.factor(df$Label)

** Visualize phage population dynamics during long infection time**

(Supplementary Figure S6)

Visualization using a dotplot

# Rearrange Label to the corresponding color code
df$Label <- factor(df$Label , levels=c("F(G322D)", "F(S427*)","F(T101A)", "Ancestor"))

# Plotting population dynamics throughout infection period
x=ggplot(df,aes(Sampling_time_min,PFU_ml,group=Label, color=Label))+
  geom_point(size=3)+ # dotplot
  geom_smooth(method="loess", se=F, linetype="dashed")+ #model fit loess
 scale_y_continuous(trans="log10",
                    labels=trans_format("log10", math_format(10^.x)),
                     breaks=c(1e+09,1e+10,1e+11) )+ #y-axis log-transformation  
  scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # setting colors
  xlab("Infection period (min)")+ # label x-axis
  ylab("Infectious phage particles per ml")+ # label y-axis
  theme_bw()+ # plot layout
  theme(legend.position = "bottom")+ # legend position
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # font size
x
## `geom_smooth()` using formula = 'y ~ x'

# Save plot as high resolution jpeg file
# ggsave("supplementary_pop_dyn.jpeg", plot = x, dpi = 600, width = 8, height = 6)

Calculate and visualize phage decay

(Figure Fig 5B)

Calculate ratio decay at individual time step / productivity peak

# Determine peak titer for each genotype and the time point of the peak
df$Peak=NA
df[df$Genotype=="WT",]$Peak=8.10e+10 # peak at 60 min p.i.
df[df$Genotype=="T101A",]$Peak=4.80e+10 #peak at 60 min p.i
df[df$Genotype=="G322D",]$Peak=1.83e+11 # peak at 120 min p.i.
df[df$Genotype=="S427*",]$Peak=2.23e+11 # peak at 120 min p.i.

# Calculate ratio between each time step and peak
df$Ratio=df$PFU_ml/df$Peak

# Create unique identifier combining sampling time and genotype
df$ID=paste(df$Sampling_time_min,df$Genotype,sep="_")

# Plot growth and decay ratios using a dot plot
ggplot(df,aes(Sampling_time_min,Ratio,group=Genotype,color=Genotype))+
  geom_point(size=3)+ # dot plot
  scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # set color palette 
  xlab("Infection period (min)")+ # label x-axis
  ylab("Ratio compared to titer peak")+ # label y-axis
  theme_bw()+ # plot layout
  theme(legend.position = "bottom") # legend position

Prepare dataset for model fititng

# Remove time points between start and peak phage titers to only capture decay period
data1=df[df$Sampling_time_min!=30,]
data1=data1[data1$ID!="60_G322D",]
data1=data1[data1$ID!="60_S427*",]
data1=data1[data1$ID!="90_G322D",]
data1=data1[data1$ID!="90_S427*",]

# Plot ratios during the decay period
ggplot(data1[data1$Sampling_time_min<310,],aes(Sampling_time_min,Ratio,group=Genotype,color=Genotype))+
  geom_point(size=2.5)+ # dot plot
  scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # set color palette
  xlab("Infection period (min)")+ # label x-axis
  ylab("Remaining fraction of infectious phage particles")+ # label y-axis
  theme_bw()+ # plot layout
  theme(legend.position = "bottom") # legend position

# ==> Phage decay seems to follow an exponential decay function 

Fitting decay curve to ancestor

# Plot WT decay using a dotplot
ggplot(data1[data1$Genotype=="WT",], aes(Sampling_time_min,Ratio))+
  geom_point()

# x and y vector extracted from the WT dataset for model fitting
x1=data1[data1$Genotype=="WT",]$Sampling_time_min
y1=data1[data1$Genotype=="WT",]$Ratio

# Compare Linear model with exponential decay model
## Linear model
a1=lm(y1~x1)

## Exponential model
### Select appropriate starting values: Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y1) * 0.5  

### Estimate the rest parameters using a linear model
model.0 <- lm(log(y1 - theta.0) ~ x1)  
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]

### Starting parameters
start<-list(alpha = alpha.0, beta = beta.0)

a2<- nls(y1 ~ alpha * exp(beta * x1) ,  start = start)

## Summary list AIC and BIC for model comparison
AIC(a1,a2)
##    df         AIC
## a1  3   0.3392432
## a2  3 -18.7277255
# df         AIC
#a1  3   0.3392432
#a2  3 -18.7277255

BIC(a1,a2)
##    df         BIC
## a1  3   0.9309169
## a2  3 -18.1360518
# df         BIC
#a1  3   0.9309169
#a2  3 -18.1360518

# ==> model a2 is best fit

## Plot predicted and measured values
plot(x1,y1)
lines(x1, predict(a2, list(x = x1)), col = 'skyblue', lwd = 3)

## Get parameter estimates
summary(a2)
## 
## Formula: y1 ~ alpha * exp(beta * x1)
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)    
## alpha  3.483344   0.708369   4.917 0.001719 ** 
## beta  -0.021112   0.002708  -7.797 0.000107 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06946 on 7 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 2.277e-06
# alpha; 3.5, beta -0.021

# Model checking
## creat an environment in which the model checking plots are saved at high resolution
#output_file <- "decay_WT_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)

# setting plotting environment
par(mfrow=c(1,2))
# qqplot
qqPlot(residuals(a2))
## [1] 4 3
### Normality of residuals
plot(residuals(a2)~fitted(a2))
abline(a=0, b=0, col="red")

#dev.off()

Fitting decay curve to F(T101A)

# Visualize decay period using a dotplot
ggplot(data1[data1$Genotype=="T101A",], aes(Sampling_time_min,Ratio))+
  geom_point()

# x and y vector extracted from the WT dataset for model fitting
x2=data1[data1$Genotype=="T101A",]$Sampling_time_min
y2=data1[data1$Genotype=="T101A",]$Ratio

# Model selection
## Linear model
b1=lm(y2~x2)

## Exponential decay model
###Select appropriate starting values; Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y2) * 0.5  

### Estimate the rest parameters using a linear model
model.0 <- lm(log(y2 - theta.0) ~ x2)  
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]

### Starting parameters
start=list(alpha = alpha.0, beta = beta.0)

b2 <- nls(y2 ~ alpha * exp(beta * x2),  start = start)

### Select model based on lower AIC and BIC value
## Combine all AICs and BICs
AIC(b1,b2)
##    df          AIC
## b1  3   0.08641326
## b2  3 -12.04236649
#   df          AIC
# b1  3   0.08641326
# b2  3 -12.04236649

BIC(b1,b2)
##    df        BIC
## b1  3   0.678087
## b2  3 -11.450693
#   df        BIC
# b1  3   0.678087
# b2  3 -11.450693

# ==> Select model b2 due to lower AIC and BIC

# Plot fitted curve and data
plot(x2,y2)
lines(x2, predict(b2, list(x = x2)), col = 'skyblue', lwd = 3)

summary(b2)
## 
## Formula: y2 ~ alpha * exp(beta * x2)
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)   
## alpha  3.078747   0.904802   3.403  0.01140 * 
## beta  -0.019946   0.003831  -5.206  0.00124 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1007 on 7 degrees of freedom
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 7.025e-06
# alpha: 3.08, beta= -0.0199

# Model checking for b3
## Create an environment in which the model validation plots are safed at high resolution
#output_file <- "decay_T101A_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)

## Set up plot window
par(mfrow=c(1,2))
## QQPlot
qqPlot(residuals(b2))
## [1] 2 6
## Residuals vs fitted values
plot(residuals(b2)~fitted(b2))
abline(a=0, b=0, col="red")

#dev.off()

Fitting decay curve to F(G322D)

# Visualize decay using a dotplot
ggplot(data1[data1$Genotype=="G322D",], aes(Sampling_time_min,Ratio))+
  geom_point()

# Remove outlier t210
x3=data1[data1$Genotype=="G322D" & data1$Sampling_time_min!=210,]$Sampling_time_min
y3=data1[data1$Genotype=="G322D" & data1$Sampling_time_min!=210,]$Ratio

# Model comparison  between linear and exponential decay model
## Linear model
c1=lm(y3~x3)

## exponential model
### Select appropriate starting values
### Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y3) * 0.5  

### Estimate the rest parameters using a linear model
model.0 <- lm(log(y3 - theta.0) ~ x3)  
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]

### Starting parameters
start <- list(alpha = alpha.0, beta = beta.0)
c2 <- nls(y3 ~ alpha * exp(beta * x3) ,  start = start)

### Summary all AICs and BICs
AIC(c1,c2)
##    df        AIC
## c1  3  -7.792185
## c2  3 -11.601528
#   df        AIC
# c1  3  -7.792185
# c2  4 -11.675159

BIC(c1,c2)
##    df        BIC
## c1  3  -8.416907
## c2  3 -12.226250
#   df        BIC
#c1  3  -8.416907
#c2  4 -12.2262

# ==> Model c2 selected based on lower AIC and BIC

## Plot predicted vs measured values
plot(x3,y3)
lines(x3, predict(c2, list(x = x3)), col = 'skyblue', lwd = 3)

# Extract parameter estimates
summary(c2)
## 
## Formula: y3 ~ alpha * exp(beta * x3)
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)   
## alpha  2.091772   0.308667   6.777  0.00247 **
## beta  -0.006480   0.000862  -7.518  0.00168 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06836 on 4 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 3.111e-06
## alpha: 2.09, beta: -0.0065

# Model validation for c2
## Creat an evironment in which the validation plot can be saved at high resolution
#output_file <- "decay_G322D_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)

# Plot setup
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(c2))
## [1] 5 3
## residuals vs fitted values plot
plot(residuals(c2)~fitted(c2))
abline(a=0, b=0, col="red")

#dev.off()

Fitting decay curve to F(S4247*)

# Visualize decay using a dotplot
ggplot(data1[data1$Genotype=="S427*",], aes(Sampling_time_min,Ratio))+
  geom_point()

# remove outlier t210
x4=data1[data1$Genotype=="S427*"&data1$Sampling_time_min!="210",]$Sampling_time_min
y4=data1[data1$Genotype=="S427*"& data1$Sampling_time_min!="210",]$Ratio

plot(x4,y4)

# Model selection between linear and exponential decay model
## Linear model
d1=lm(y4~x4)

## Exponential decay model: Select appropriate starting values; Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y4) * 0.5  

### Estimate the rest parameters using a linear model
model.0 <- lm(log(y4 - theta.0) ~ x4)  
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]

### Starting parameters
start <- list(alpha = alpha.0, beta = beta.0)
d2 <- nls(y4 ~ alpha * exp(beta * x4) ,  start = start)

### Model comparison to chose the best fit model using AIC and BIC
AIC(d1,d2)
##    df       AIC
## d1  3 -3.548757
## d2  3 -5.901902
#  df       AIC
#d1  3 -3.548757
#d2  3 -5.901902

BIC(d1,d2)
##    df       BIC
## d1  3 -4.173478
## d2  3 -6.526623
# df       BIC
# d1  3 -4.173478
# d2  3 -6.526623

# ==> Model d2 selected based on the lower AIC and BIC

## Extract parameter estimates
summary(d2) 
## 
## Formula: y4 ~ alpha * exp(beta * x4)
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)  
## alpha  1.745038   0.407150   4.286   0.0128 *
## beta  -0.005442   0.001313  -4.146   0.0143 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1099 on 4 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 2.651e-06
## alpha: 1.75, beta: -0.0054

# Model validation for d2
## Create an evironment to save validation plots at high resolution
#output_file <- "decay_S427_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)

## Plot layout
par(mfrow=c(1,2))

## qqplot
qqPlot(residuals(d2))
## [1] 3 6
## residuals vs fitted values plot
plot(residuals(d2)~fitted(d2))
abline(a=0, b=0, col="red")

#dev.off()

Visualize decay measured and predicted values for all genotypes in one plot

(Figure Fig. 5B)

# Visualize decay data using dot and line plots
a=ggplot()+
  geom_point(aes(x=x1,y=y1),color="#252525")+ # measured decay ancestor
    geom_point(aes(x=x2,y=y2),color="#ae017e")+ # measured decay F(T101A)
  geom_point(aes(x=x3,y=y3),color="#c6d325")+ # measured decay F(G322D)
  geom_point(aes(x=x4,y=y4),color="#ef7c00")+ # measured decay F(S427*)
annotate(geom="text", x=250, y=1.00, hjust=0, label=expression("Decay rate (min"^{-1}*"):"),
         color="black", fontface="bold", size=5)+ # Decay rate text
  annotate(geom="text", x=250, y=0.92, hjust=0,label=expression("F(G322D): 6.5*10"^{-3}*""),
         color="#c6d325", fontface="bold", size=5)+ # Decay rate text F(G322D)
  annotate(geom="text", x=250, y=0.85, hjust=0,label=expression("F(S427*): 5.4*10"^{-3}*""),
         color="#ef7c00", fontface="bold", size=5)+# Decay rate text F(S427*)
  annotate(geom="text", x=250, y=0.78, hjust=0,label=expression("F(T101A): 2.0*10"^{-2}*""),
         color="#ae017e", fontface="bold", size=5)+# Decay rate text F(T101A)
  annotate(geom="text", x=250, y=0.71, hjust=0,label=expression("Ancestor: 2.1*10"^{-2}*""),
         color="#252525", fontface="bold", size=5)+ # Decay rate text ancestor
  geom_line(aes(x=x1, y=predict(a2)),color="#252525", size=0.75)+ # predicted decay ancestor
    geom_line(aes(x=x2, y=predict(b2)),color="#ae017e", size=0.75)+ # predicted decay F(T101A)
  geom_line(aes(x=x3, y=predict(c2)),color="#c6d325", linetype="dashed",size=0.75)+ # predicted decay F(G322D)
  geom_line(aes(x=x4, y=predict(d2)),color="#ef7c00", linetype="dashed",size=0.75)+ # predicted decay F(S427*)
  xlab("Elapsed time since infection (min)")+ # label x-axis
  ylab("Remaining fraction of infectious phage particles")+ # label y-axis
  theme_bw()+ # plot layout
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # font size
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
a
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

# Save plot at high resolution
# ggsave("decay_plot.jpeg", plot = a, dpi = 600, width = 9, height = 7)

Experiment 7: One-step growth curve of ancestral phage

Code used to generate Fig. S3 in the manuscript

READ ME Specification of columns in data set

Load data set

df=read.csv("experiment_7_OSGC_WT.csv")

# df has 11 observations and 16 variables

Checking data set

str(df) # structure of  columns
## 'data.frame':    11 obs. of  16 variables:
##  $ Genotype         : chr  "WT" "WT" "WT" "WT" ...
##  $ MOI              : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ Sampling_time_min: int  1 2 3 4 5 6 8 10 15 20 ...
##  $ PFU_1            : num  17 26 6 12 134 ...
##  $ PFU_2            : int  11 18 NA 11 148 119 23 33 15 16 ...
##  $ PFU_3            : int  NA NA NA NA 136 129 14 65 18 21 ...
##  $ Median_PFU       : num  14 22 6 11.5 136 129 23 65 18 21 ...
##  $ Dilution_PFU     : num  2.5 2.5 3.3 5.0 1.0e+01 1.0e+02 1.0e+04 1.0e+04 1.0e+05 1.0e+05 ...
##  $ Dilution_OSGC    : int  2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
##  $ PFU_ml           : num  3.50e+01 5.50e+01 1.98e+01 5.75e+01 1.36e+03 1.29e+04 2.30e+05 6.50e+05 1.80e+06 2.10e+06 ...
##  $ t0_PFU_ml        : num  35 35 35 35 35 35 35 35 35 35 ...
##  $ PFU_t0_norm      : num  0 20 -15.2 22.5 1330 12900 230000 650000 1800000 2100000 ...
##  $ Bacteria_CFU_ml  : num  2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 ...
##  $ Infected_CFU_ml  : num  14100 14100 14100 14100 14100 14100 14100 14100 14100 14100 ...
##  $ PFU_ml_produced  : num  0 0 0 0 0.09 ...
##  $ Elapsed_time     : int  7 8 9 10 11 12 14 16 21 26 ...

Visualize phage concentration for each time point

ggplot(df, aes(Elapsed_time,PFU_ml_produced))+
  geom_point(size=2)+ # dot plot
     scale_y_continuous(trans = scales::pseudo_log_trans(),
                     breaks = c(0, 10^(0:6)))+ # log transformed y axis
  xlim(0,35)+ # set x limits so time point 0-6 min (adsorption time) are also visualized
  theme_bw() # plot layout

Model fitting to get parameter estimates for lysis time and burst size

# Subset data frame to columns "Elapsed time" and "PFU_ml_produced"
WT=df[,c(16,15)]

## For model fitting, first compare exponential growth with logistic growth model to determine "best-fit" model

### Exponential increase model (beta= constant, alpha=growth rate)
model1<-nls(PFU_ml_produced~(beta+exp(alpha*Elapsed_time)),
            data=WT, start=list(beta=max(WT$PFU_ml_produced),alpha=0.5))

### Logistic growth model
model2 <- nls(PFU_ml_produced ~ SSlogis(Elapsed_time, b, l, scal), data=WT,
              start = list(b=max(WT$PFU_ml_produced),l=mean(WT$Elapsed_time),scal=1))

## Model comparison based on AIC and BIC
AIC(model1,model2)
##        df       AIC
## model1  3 117.03264
## model2  4  73.82167
#   df       AIC
#model1  3 117.03
#model2  4  73.82

BIC(model1,model2)
##        df       BIC
## model1  3 118.22633
## model2  4  75.41325
 #df       BIC
#model1  3 118.23
#model2  4 75.41

### --> Select model 2 [logistic growth] due to the lower AIC and BIC value ###

## Extract parameters
summary(model2)
## 
## Formula: PFU_ml_produced ~ SSlogis(Elapsed_time, b, l, scal)
## 
## Parameters:
##      Estimate Std. Error t value Pr(>|t|)    
## b    138.0512     3.8516  35.842 4.02e-10 ***
## l     17.0011     0.3487  48.757 3.46e-11 ***
## scal   1.4321     0.2583   5.545 0.000544 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.653 on 8 degrees of freedom
## 
## Number of iterations to convergence: 6 
## Achieved convergence tolerance: 8.576e-06
#Parameters:
#b    138.0512     3.8516  35.842 4.02e-10 ***
#l     17.0011     0.3487  48.757 3.46e-11 ***

## Results##
# burst size b = 138, lysis time l= 17 minutes

## Model validation using a qqplot and residuals vs fitted plot (Supplementary S3 B and C)

# Set the output file name to export plots at high resolution
output_file <- "model_check_OSGC.jpeg"

# Open a PNG device
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)

#par(mfrow=c(1,2))

qqPlot(residuals(model2)) # looks good

## [1] 10 11
plot(residuals(model2)~fitted(model2))
abline(a=0, b=0, col="red") # ok

#dev.off()


## Visualize model fit for one-step growth curve (Supplementary S3 A)
x=ggplot()+
    geom_point(data=WT,aes(x=Elapsed_time,y=PFU_ml_produced), color="#252525")+ #raw data of produced phages per time step
    geom_line(data=WT,aes(x=Elapsed_time,y=predict(model2)),color="#252525", linetype="dashed",size=0.8)+ # model fit of model 2
annotate(geom="text", x=6, y=140, hjust=0,label="lysis time: 17 min",
              color="black")+ # add lysis time value as text
  annotate(geom="text", x=6, y=130, hjust=0,label="burst size: 138 per host cell",
              color="black")+ # add burst size value as text
  xlab("Elased time since adsorption initiation (min)")+ # label x-axis
  ylab("Produced infectious phage particles per ml")+ # label y-axis
  theme_bw()+ # plot layout
   theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14)) # font size

x

# Export one step plot at high resolution
# ggsave("OSGC_WT.jpeg", plot = x, device = "jpeg", dpi = 600, width = 8, height = 6)